library(survival)
library(KMsurv)
library(ggplot2)
library(ggpubr)
library(survminer)
library(plotly)
library(muhaz)
library(ggthemes)
vars <- c("id", "clinic", "status", "time", "prison", "methadone")
addicts <-
  read.table(
    url(
      "https://dmrocke.ucdavis.edu/Class/BST222.2022.Fall/addicts.txt"
    ),
    header = F,
    col.names = vars
  )
#addicts

Question 1

#change variables to factors to be used in Cox PH
addicts$clinic <-
  factor(addicts$clinic, labels = c("Clinic1", "Clinic2"))
addicts$prison <- factor(addicts$prison, labels = c("No", "Yes"))

#head(addicts)

surv_object <- Surv(time = addicts$time, event = addicts$status)
addicts.cox.strat <-
  coxph(surv_object ~ strata(clinic) + prison + methadone, data = addicts)
summary(addicts.cox.strat)
## Call:
## coxph(formula = surv_object ~ strata(clinic) + prison + methadone, 
##     data = addicts)
## 
##   n= 238, number of events= 150 
## 
##                coef exp(coef)  se(coef)      z Pr(>|z|)    
## prisonYes  0.389605  1.476397  0.168930  2.306   0.0211 *  
## methadone -0.035115  0.965495  0.006465 -5.432 5.59e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## prisonYes    1.4764     0.6773    1.0603    2.0559
## methadone    0.9655     1.0357    0.9533    0.9778
## 
## Concordance= 0.651  (se = 0.026 )
## Likelihood ratio test= 33.91  on 2 df,   p=4e-08
## Wald test            = 32.66  on 2 df,   p=8e-08
## Score (logrank) test = 33.33  on 2 df,   p=6e-08

Question 2

addicts.zph.stra <- cox.zph(addicts.cox.strat)
addicts.zph.stra
##            chisq df    p
## prison    0.0264  1 0.87
## methadone 1.0552  1 0.30
## GLOBAL    1.1302  2 0.57
#For clinic
ggcoxzph(addicts.zph.stra[1], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-4,4))

#For prison
ggcoxzph(addicts.zph.stra[2], se = FALSE, font.main = 12, ggtheme = theme_solarized(), point.col = "#0096FF", ylim = c(-0.3,0.3))

We can see from the plot that the prison variable has a sinusoidal pattern of variation. Meanwhile, our methadone variable seems to have little evidence of a pattern, with a plot that shows random scatter.

Question 3

addicts.mart <- residuals(addicts.cox.strat, type = "martingale")

addicts.cs <- addicts$status - addicts.mart

#Cumulative hazard 
surv.csr <- survfit(Surv(addicts.cs, addicts$status) ~1, type = "fleming-harrington", data = addicts)

#plot(surv.csr, fun ="cumhaz")


cumHazPlot <-
  ggsurvplot(
    surv.csr,
    fun = "cumhaz",
    conf.int = TRUE,
    palette = c("#581845"),
    ggtheme = theme_solarized()
  ) + ggtitle("Cumulative Hazard for Cox-Snell Residuals")
#ggplotly(cumHazPlot[[1]])


ggplotly(cumHazPlot$plot + geom_abline())

We can see that after time about 2.5, our cumulative hazard does not follow our straight trend line. This gives us the impression that our model lacks in being a good fit for the data.

Question 4

#This time, without methadone
addicts.coxNoMeth <- coxph(surv_object ~ strata(clinic) + prison, data = addicts)
addicts.mart <- residuals(addicts.coxNoMeth, type = "martingale")


lowessOBJ <- as.data.frame(lowess(addicts$methadone, addicts.mart))

ggplotly(
  ggplot() + aes(addicts$methadone, addicts.mart) + geom_point(col = "#FFA000") + labs(x = "Methadone", y = "Martingale Residuals", title = "Martingale Residuals vs. Methadone Dosage") + geom_line(data = lowessOBJ, aes(x = x, y = y), col = "#388E3C") + theme_solarized()
)

Since our lowess line seems pretty linear, we would advise that a transformation of the data is not neccessary.

Question 5

# With Linear predictor
addicts.predict <- predict(addicts.cox.strat)
addicts.mart <- residuals(addicts.cox.strat, type = "martingale")

#Deviance and df beta
addicts.dev <- residuals(addicts.cox.strat, type = "deviance")
addicts.dfb <- residuals(addicts.cox.strat, type = "dfbeta")

#MArtingale vs Linear Predictor
ggplotly(
  ggplot() + aes(
    x = addicts.predict,
    y = addicts.mart,
    label = rownames(addicts)
  ) + geom_point() + labs(x = "Linear Predictor", y = "Martingale Residual", title = "Martingale Residuals vs Linear Predictor") + theme_solarized()
)
# Deviance vs Linear Predictor
ggplotly(
  ggplot() + aes(
    x = addicts.predict,
    y = addicts.dev,
    label = rownames(addicts)
  ) + geom_point() + labs(x = "Linear Predictor", y = "Deviance Residual", title = "Deviance Residuals vs Linear Predictor") + theme_solarized()
)
# DfBeta vs Linear Predictor
# Clinic
# ggplotly(
#   ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 1]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Clinic Type", title = "dfbeta Values for Observation Number by Clinic Type") + theme_solarized()
# )

# Prison
ggplotly(
  ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 1]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Prison Status", title = "dfbeta Values for Observation Number by Prison Status") + theme_solarized()
)
#Methadone
ggplotly(
  ggplot() + aes(x = as.numeric(rownames(addicts)), y = addicts.dfb[, 2]) + geom_point() + labs(x = "Observation Number", y = "dfbeta for Methadone Level", title = "dfbeta Values for Observation Number by Methadone Level") + theme_solarized()
)

Observations to Examine by Residuals and Influence

  • Martingale Residuals
    • 8,9, 12, 26, 54
  • Deviance Residuals
    • 8, 123, 156, 175
  • Prison Influence
    • 8, 9 ,12, 26
  • Methadone Level Influence
    • 26, 70, 84, 156
    From the residual plots, the most important pbservations to examine would be 8,9 26, 70, and 156. Let’s observe these two observations:
addicts[c(8, 9, 26, 70, 156),]
##      id  clinic status time prison methadone
## 8     8 Clinic1      0  796    Yes        60
## 9     9 Clinic1      1  892     No        50
## 26   27 Clinic1      0  566    Yes        45
## 70   75 Clinic1      0  905     No        80
## 156 181 Clinic2      1  216     No       100

Observations 8 and 26 were prisoners who were censored observations in our data. 70 was a censored observation who was not a prisoner, but had a very long time. Given their long survival times, they could have felt they no longer needed to be part of the study. These prisoners also had low methadone levels. Observations 9 and 156 were recorded as leaving the clinic. 156, had a very high methadone level however.